Table of Contents

1. 模型选择

划分训练集与测试集

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

2. 模型评估

2.1. 分类问题

from sklearn.metrics import accuracy_score
score=accuracy_score(y_test, y_pred)
score
>>> 
0.8349474338391424

from sklearn.metrics import classification_report
report_str=classification_report(y_test, y_pred)
print (report_str)
>>>
              precision    recall  f1-score   support

         0.0       0.90      0.79      0.84     21521
         1.0       0.77      0.89      0.83     17097

    accuracy                           0.83     38618
   macro avg       0.84      0.84      0.83     38618
weighted avg       0.84      0.83      0.84     38618

# 混淆矩阵
from sklearn.metrics import confusion_matrix
array=confusion_matrix(y_test, y_pred)
array
>>>
array([[17097,  4424],
       [ 1950, 15147]])

# F1 值
f1_score = sklearn.metrics.f1_score(y_test, y_pred)
f1_score
>>>



from sklearn.metrics import confusion_matrix
array=confusion_matrix(y_test, y_pred)

2.2. 回归指标

# 平均绝对误差
from sklearn.metrics import mean_absolute_error
mae=mean_absolute_error(y_true, y_pred)
mae
>>>
1.12

# 均方误差
from sklearn.metrics import mean_squared_error
mse=mean_squared_error(y_test, y_pred)
mse
>>>
2.22

# R2 评分 决定系数
from sklearn.metrics import r2_score
r2_score=r2_score(y_true, y_pred)
r2_score
>>>
0.11

聚类指标

群集指标

# 修正兰德系数
from sklearn.metrics import adjusted_rand_score
adjusted_rand_score(y_true, y_pred)
# 同质性
from sklearn.metrics import homogeneity_score
homogeneity_score(y_true, y_pred)
# V-measure
from sklearn.metrics import v_measure_score
metrics.v_measure_score(y_true, y_pred)